decompress linux kernel
We are in linux now, let's learn something about linux boot protocol before next stage of code study.
On the x86 platform, the Linux kernel uses a rather complicated boot convention. This has evolved partially due to historical aspects, as well as the desire in the early days to have the kernel itself be a bootable image, the complicated PC memory model and due to changed expectations in the PC industry caused by the effective demise of real-mode DOS as a mainstream operating system.
Source code of instructions located at address 0x1000000, some defined macro included in compile result for example:
arch/x86/boot/compressed/piggy.S
.section ".rodata.compressed","a",@progbits
.globl z_input_len
z_input_len = 3888088
.globl z_output_len
z_output_len = 8443556
.globl z_extract_offset
z_extract_offset = 0x461000
.globl z_extract_offset_negative
z_extract_offset_negative = -0x461000
.globl input_data, input_data_end
input_data:
.incbin "arch/x86/boot/compressed/vmlinux.bin.gz"
input_data_end:
include/asm/asm-offsets.h
* DO NOT MODIFY.
*
* This file was generated by Kbuild
*
*/
#define IA32_SIGCONTEXT_ax 44 /* offsetof(struct sigcontext, ax) # */
#define IA32_SIGCONTEXT_bx 32 /* offsetof(struct sigcontext, bx) # */
#define IA32_SIGCONTEXT_cx 40 /* offsetof(struct sigcontext, cx) # */
#define IA32_SIGCONTEXT_dx 36 /* offsetof(struct sigcontext, dx) # */
#define IA32_SIGCONTEXT_si 20 /* offsetof(struct sigcontext, si) # */
#define IA32_SIGCONTEXT_di 16 /* offsetof(struct sigcontext, di) # */
#define IA32_SIGCONTEXT_bp 24 /* offsetof(struct sigcontext, bp) # */
#define IA32_SIGCONTEXT_sp 28 /* offsetof(struct sigcontext, sp) # */
#define IA32_SIGCONTEXT_ip 56 /* offsetof(struct sigcontext, ip) # */
#define CPUINFO_x86 0 /* offsetof(struct cpuinfo_x86, x86) # */
#define CPUINFO_x86_vendor 1 /* offsetof(struct cpuinfo_x86, x86_vendor) # */
#define CPUINFO_x86_model 2 /* offsetof(struct cpuinfo_x86, x86_model) # */
#define CPUINFO_x86_mask 3 /* offsetof(struct cpuinfo_x86, x86_mask) # */
#define CPUINFO_hard_math 6 /* offsetof(struct cpuinfo_x86, hard_math) # */
#define CPUINFO_cpuid_level 20 /* offsetof(struct cpuinfo_x86, cpuid_level) # */
#define CPUINFO_x86_capability 24 /* offsetof(struct cpuinfo_x86, x86_capability) # */
#define CPUINFO_x86_vendor_id 60 /* offsetof(struct cpuinfo_x86, x86_vendor_id) # */
#define TI_task 0 /* offsetof(struct thread_info, task) # */
#define TI_exec_domain 4 /* offsetof(struct thread_info, exec_domain) # */
#define TI_flags 8 /* offsetof(struct thread_info, flags) # */
#define TI_status 12 /* offsetof(struct thread_info, status) # */
#define TI_preempt_count 20 /* offsetof(struct thread_info, preempt_count) # */
#define TI_addr_limit 24 /* offsetof(struct thread_info, addr_limit) # */
#define TI_restart_block 28 /* offsetof(struct thread_info, restart_block) # */
#define TI_sysenter_return 60 /* offsetof(struct thread_info, sysenter_return) # */
#define TI_cpu 16 /* offsetof(struct thread_info, cpu) # */
#define GDS_size 0 /* offsetof(struct desc_ptr, size) # */
#define GDS_address 2 /* offsetof(struct desc_ptr, address) # */
#define PT_EBX 0 /* offsetof(struct pt_regs, bx) # */
#define PT_ECX 4 /* offsetof(struct pt_regs, cx) # */
#define PT_EDX 8 /* offsetof(struct pt_regs, dx) # */
#define PT_ESI 12 /* offsetof(struct pt_regs, si) # */
#define PT_EDI 16 /* offsetof(struct pt_regs, di) # */
#define PT_EBP 20 /* offsetof(struct pt_regs, bp) # */
#define PT_EAX 24 /* offsetof(struct pt_regs, ax) # */
#define PT_DS 28 /* offsetof(struct pt_regs, ds) # */
#define PT_ES 32 /* offsetof(struct pt_regs, es) # */
#define PT_FS 36 /* offsetof(struct pt_regs, fs) # */
#define PT_GS 40 /* offsetof(struct pt_regs, gs) # */
#define PT_ORIG_EAX 44 /* offsetof(struct pt_regs, orig_ax) # */
#define PT_EIP 48 /* offsetof(struct pt_regs, ip) # */
#define PT_CS 52 /* offsetof(struct pt_regs, cs) # */
#define PT_EFLAGS 56 /* offsetof(struct pt_regs, flags) # */
#define PT_OLDESP 60 /* offsetof(struct pt_regs, sp) # */
#define PT_OLDSS 64 /* offsetof(struct pt_regs, ss) # */
#define EXEC_DOMAIN_handler 4 /* offsetof(struct exec_domain, handler) # */
#define IA32_RT_SIGFRAME_sigcontext 164 /* offsetof(struct rt_sigframe, uc.uc_mcontext) # */
#define pbe_address 0 /* offsetof(struct pbe, address) # */
#define pbe_orig_address 4 /* offsetof(struct pbe, orig_address) # */
#define pbe_next 8 /* offsetof(struct pbe, next) # */
#define TSS_sysenter_sp0 -8572 /* offsetof(struct tss_struct, x86_tss.sp0) - sizeof(struct tss_struct) # */
#define PAGE_SIZE_asm 4096 /* PAGE_SIZE # */
#define PAGE_SHIFT_asm 12 /* PAGE_SHIFT # */
#define PTRS_PER_PTE 512 /* PTRS_PER_PTE # */
#define PTRS_PER_PMD 512 /* PTRS_PER_PMD # */
#define PTRS_PER_PGD 4 /* PTRS_PER_PGD # */
#define crypto_tfm_ctx_offset 48 /* offsetof(struct crypto_tfm, __crt_ctx) # */
#define BP_scratch 484 /* offsetof(struct boot_params, scratch) # */
#define BP_loadflags 529 /* offsetof(struct boot_params, hdr.loadflags) # */
#define BP_hardware_subarch 572 /* offsetof(struct boot_params, hdr.hardware_subarch) # */
#define BP_version 518 /* offsetof(struct boot_params, hdr.version) # */
#define BP_kernel_alignment 560 /* offsetof(struct boot_params, hdr.kernel_alignment) # */
#endif
Linux code memory layout is defined in arch/x86/boot/compressed/vmlinux.lds.S
arch/x86/boot/compressed/vmlinux.lds.S
#include <asm-generic/vmlinux.lds.h>
OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT)
#undef i386
#include <asm/page_types.h>
#ifdef CONFIG_X86_64
OUTPUT_ARCH(i386:x86-64)
ENTRY(startup_64)
#else
OUTPUT_ARCH(i386)
ENTRY(startup_32)
#endif
SECTIONS
{
/* Be careful parts of head_64.S assume startup_32 is at
* address 0.
*/
. = 0;
.head.text : {
_head = . ;
HEAD_TEXT
_ehead = . ;
}
.rodata.compressed : {
*(.rodata.compressed)
}
.text : {
_text = .; /* Text */
*(.text)
*(.text.*)
_etext = . ;
}
.rodata : {
_rodata = . ;
*(.rodata) /* read-only data */
*(.rodata.*)
_erodata = . ;
}
.data : {
_data = . ;
*(.data)
*(.data.*)
_edata = . ;
}
. = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
.bss : {
_bss = . ;
*(.bss)
*(.bss.*)
*(COMMON)
. = ALIGN(8); /* For convenience during zeroing */
_ebss = .;
}
#ifdef CONFIG_X86_64
. = ALIGN(PAGE_SIZE);
.pgtable : {
_pgtable = . ;
*(.pgtable)
_epgtable = . ;
}
#endif
_end = .;
arch/x86/boot/compressed/head_32.S:33
__HEAD
ENTRY(startup_32)
cld
/*
* Test KEEP_SEGMENTS flag to see if the bootloader is asking
* us to not reload segments
*/
testb $(1<<6), BP_loadflags(%esi) -> 0x1000001: testb $0x40,0x211(%esi)
(gdb) info registers esi
esi 0x8b000 569344
(gdb) x/b 0x8b000+0x211
0x8b211: 0x81
jnz 1f
(gdb) info registers eflags
eflags 0x200046 [ PF ZF ID ]
cli
movl $__BOOT_DS, %eax -> 0x100000b: mov $0x18,%eax
movl %eax, %ds
movl %eax, %es
movl %eax, %fs
movl %eax, %gs
movl %eax, %ss
1:
/*
* Calculate the delta between where we were compiled to run
* at and where we were actually loaded at. This can only be done
* with a short local call on x86. Nothing else will tell us what
* address we are running at. The reserved chunk of the real-mode
* data at 0x1e4 (defined as a scratch field) are used as the stack
* for this calculation. Only 4 bytes are needed.
*/
leal (BP_scratch+4)(%esi), %esp -> 0x100001a: lea 0x1e8(%esi),%esp
(gdb) info registers esp
esp 0x8b1e8 0x8b1e8
call 1f -> 0x1000020: call 0x1000025
1: popl %ebp
(gdb) info registers ebp
ebp 0x1000025 0x1000025
subl $1b, %ebp -> 0x1000026: sub $0x25,%ebp
(gdb) info registers ebp
ebp 0x1000000 0x1000000
/*
* %ebp contains the address we are loaded at by the boot loader and %ebx
* contains the address where we should move the kernel image temporarily
* for safe in-place decompression.
*/
#ifdef CONFIG_RELOCATABLE
movl %ebp, %ebx
movl BP_kernel_alignment(%esi), %eax -> 0x100002e: mov 0x230(%esi),%eax
(gdb) info registers esi
esi 0x8b000 569344
(gdb) x/w 0x8b000+0x230
0x8b230: 0x01000000
decl %eax
addl %eax, %ebx
(gdb) info registers ebx
ebx 0x1ffffff 33554431
notl %eax
(gdb) info registers eax
eax 0xff000000 -16777216
andl %eax, %ebx
(gdb) info registers ebx
ebx 0x1000000 16777216
#else
movl $LOAD_PHYSICAL_ADDR, %ebx
#endif
/* Target address to relocate to for decompression */
addl $z_extract_offset, %ebx -> add $0x461000,%ebx
(gdb) info registers ebx
ebx 0x1461000 21368832
/* Set up the stack */
leal boot_stack_end(%ebx), %esp -> 0x1000041: lea 0x3be740(%ebx),%esp
(gdb) info registers esp
esp 0x181f740 0x181f740
/* Zero EFLAGS */
pushl $0
popfl
/*
* Copy the compressed kernel to the end of our buffer
* where decompression in place becomes safe.
*/
pushl %esi
leal (_bss-4)(%ebp), %esi -> 0x100004b: lea 0x3b973c(%ebp),%esi
(gdb) info registers esi
esi 0x13b973c 20682556
leal (_bss-4)(%ebx), %edi -> 0x1000051: lea 0x3b973c(%ebx),%edi
(gdb) info registers edi
edi 0x181a73c 25274172
movl $(_bss - startup_32), %ecx -> 0x1000057: mov $0x3b9740,%ecx
shrl $2, %ecx
(gdb) info registers ecx
ecx 0xee5d0 976336
std
rep movsl
cld
popl %esi
(gdb) info registers esi
esi 0x8b000 569344
/*
* Jump to the relocated address.
*/
leal relocated(%ebx), %eax -> 0x1000064: lea 0x3b5450(%ebx),%eax
jmp *%eax
ENDPROC(startup_32)
.text
relocated:
/*
* Clear BSS (stack is currently empty)
*/
xorl %eax, %eax
leal _bss(%ebx), %edi -> 0x1816452: lea 0x3b9740(%ebx),%edi
(gdb) info registers edi
edi 0x181a740 25274176
leal _ebss(%ebx), %ecx -> 0x1816458: lea 0x3be768(%ebx),%ecx
subl %edi, %ecx
(gdb) info registers ecx
ecx 0x5028 20520
shrl $2, %ecx
(gdb) info registers ecx
ecx 0x140a 5130
rep stosl -> 0x1816463: rep stos %eax,%es:(%edi)
/*
* Do the decompression, and jump to the new kernel..
*/
leal z_extract_offset_negative(%ebx), %ebp -> 0x1816465: lea -0x461000(%ebx),%ebp
/* push arguments for decompress_kernel: */
(gdb) info registers ebp
ebp 0x1000000 0x1000000
pushl %ebp /* output address */
pushl $z_input_len /* input_len */ -> 0x181646c: push $0x3b53d8
leal input_data(%ebx), %eax -> 0x1816471: lea 0x6c(%ebx),%eax
(gdb) info registers eax
eax 0x146106c 21368940
pushl %eax /* input_data */
leal boot_heap(%ebx), %eax -> 0x1816478: lea 0x3b9740(%ebx),%eax
(gdb) info registers eax
eax 0x181a740 25274176
pushl %eax /* heap area */
pushl %esi /* real mode pointer */
call decompress_kernel -> 0x1816480: call 0x1819250
addl $20, %esp
(gdb) info registers esp
esp 0x181f740 0x181f740
#if CONFIG_RELOCATABLE
/*
* Find the address of the relocations.
*/
leal z_output_len(%ebp), %edi -> 0x1816488: lea 0x80d6a4(%ebp),%edi
(gdb) info registers edi
edi 0x180d6a4 25220772
/*
* Calculate the delta between where vmlinux was compiled to run
* and where it was actually loaded.
*/
movl %ebp, %ebx
(gdb) info registers ebx
ebx 0x1000000 16777216
subl $LOAD_PHYSICAL_ADDR, %ebx -> 0x1816490: sub $0x1000000,%ebx
(gdb) info registers ebx
ebx 0x0 0
(gdb) info registers eflags
eflags 0x46 [ PF ZF ]
jz 2f /* Nothing to be done if loaded at compiled addr. */ -> 0x181649f: je 0x18164aa
/*
* Process relocations.
*/
1: subl $4, %edi
movl (%edi), %ecx
testl %ecx, %ecx
jz 2f
addl %ebx, -__PAGE_OFFSET(%ebx, %ecx)
jmp 1b
2:
#endif
/*
* Jump to the decompressed kernel.
*/
xorl %ebx, %ebx
(gdb) info registers ebp
ebp 0x1000000 0x1000000
jmp *%ebp
/*
* Stack and heap for uncompression
*/
.bss
.balign 4
boot_heap:
.fill BOOT_HEAP_SIZE, 1, 0
boot_stack:
.fill BOOT_STACK_SIZE, 1, 0
boot_stack_end:
About decompress_kernel routine, its source code as follow.
arch/x86/boot/compressed/misc.c:304
asmlinkage void decompress_kernel(void *rmode, memptr heap,
unsigned char *input_data,
unsigned long input_len,
unsigned char *output)
{
real_mode = rmode;
if (real_mode->hdr.loadflags & QUIET_FLAG)
quiet = 1;
if (real_mode->screen_info.orig_video_mode == 7) {
vidmem = (char *) 0xb0000;
vidport = 0x3b4;
} else {
vidmem = (char *) 0xb8000;
vidport = 0x3d4;
}
lines = real_mode->screen_info.orig_video_lines;
cols = real_mode->screen_info.orig_video_cols;
free_mem_ptr = heap; /* Heap */
free_mem_end_ptr = heap + BOOT_HEAP_SIZE;
if ((unsigned long)output & (MIN_KERNEL_ALIGN - 1))
error("Destination address inappropriately aligned");
#ifdef CONFIG_X86_64
if (heap > 0x3fffffffffffUL)
error("Destination address too large");
#else
if (heap > ((-__PAGE_OFFSET-(512<<20)-1) & 0x7fffffff))
error("Destination address too large");
#endif
#ifndef CONFIG_RELOCATABLE
if ((unsigned long)output != LOAD_PHYSICAL_ADDR)
error("Wrong destination address");
#endif
if (!quiet)
putstr("\nDecompressing Linux... ");
decompress(input_data, input_len, NULL, NULL, output, NULL, error);
parse_elf(output);
if (!quiet)
putstr("done.\nBooting the kernel.\n");
return;
}